# This file contains the R code that was used to implement the estimators and functionals in 
#"Transformations In Hazard Rate Estimation For Heavy-Tailed Data", IFAC proceesings, Springer, (2013)
# As a fully working example the code herein can estimate the hazard rate from the Danish Fire Loss (DFL) data . 
# More general use of the estimates is also feasible by supplying the corresponding data set.  
#Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function.


"Epanechnikov"<-
  function(x){
    #Epanechikov kernel, square root of 5 is taken as 2.236068 (to save comp. time).
    ifelse(abs(x)< 2.236068, (3/4)*(1-((x^2)/5 ))/2.236068, ifelse(abs(x)>2.236068, 0,0))
  }

"IntEpanechnikov"<-function(x)
{
  ifelse(x< -2.236068, 0, ifelse(x> 2.236068, 1, .5- (2.236068*x*(x^2-15))/100))
}

"HigherOrder"<-function(x){ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1) * (3-x^2)/2))}#, ifelse(abs(x) > 4, 0, 0)) }







"Thre1"<- #Watson - Leadbetter hazard rate (see Phd)
  function(xin, xout, kfun, bandfactor)
  {
    n<- length(xin)
    n1<-1:n
    xin.use<-sort(xin)
    h<-bandfactor*diff(quantile(xin.use, c(.25, .75)))#NOTE: It doesn't matter if I use xin.use or xin in the interquartile.
    arg1<-(sapply(xout, "-", xin.use))/h	
    arg2<-kfun(arg1)* 1/(n-n1+1)
    arg3<-colSums(arg2)* 1/h
    arg3
  }

"iThre1"<- #Watson - Leadbetter integrated hazard rate (cum. haz. est.)
  function(xin, xout, kfun, bandfactor)
  {
    n<- length(xin)
    n1<-1:n
    xin.use<-sort(xin)
    h<-bandfactor*diff(quantile(xin.use, c(.25, .75)))
    arg1<-(sapply(xout, "-", xin.use))/h	
    arg2<-kfun(arg1)* 1/(n-n1+1)
    arg3<-colSums(arg2)
    arg3 
  }

"SimpsonInt"<- function(xin, h)
{   #xin = data, h=distance of the points at which function is evaluated
  n<-length(xin)
  nn<-1:n
  int0<-xin[1]
  int2n<-xin[n]
  inteven<-xin[seq(3,n-1,by=2)]
  intodd<-xin[seq(2,n-2,by=2)]
  sumodd<-sum(intodd)
  sumeven<-sum(inteven)
  res<-(h/3)*(int0+4*sumodd+2*sumeven+int2n)
  res
}

"PilotWidth1"<- function(sample)
{
  #returns Silverman's default width 1.06 A n^(-1/5)
  n<-length(sample)
  StD<-sd(sample)
  IqR<-diff(quantile(sample, c(.25, .75)))
  tmp<-min(StD, IqR/1.34)
  DefWidth<- 1.06*tmp*n^(-1/5)
  DefWidth
}


"LambdaHat"<-
  #Watson & Leadbetter estimator (Hazard Analysis I, Biometrika (1964))
  # -order statistics version. 
  function(xin, xout, h, kfun){
    n<- length(xin)
    n1<-1:n
    nn<-length(xout)
    xin.use<-sort(xin)
    #cat(xin.use)
    hazard.rate<-vector("numeric",nn)
    for(i in 1:nn){
      hazard.rate[i]<-(1/h)*sum((kfun((xout[i]-xin.use)/h))/(n-n1+1))
    }
    hazard.rate
  }


"BandsIkde"<-function(xin)
  #returns default bandwidths for the variable bandwidth distribution function estimate: Swanepoel and Van Graan (2007)
  #d1 is the pilot bandwidth and d2 the adaptive
{
  IqR<-diff(quantile(xin, c(.25, .75)))
  
  StD<-sd(xin)
  tmp<-min(StD, IqR/1.34)
  n<-length(xin)
  
  d2<- (375*sqrt(3) / (28*pi))^(1/7) *tmp^(4/7) * n^(1/7)
  d1 <-3.9 * tmp * n^(1/3)
  c(d1,d2)
}

"kde"<- function(xin, xout, h, kfun)
{
  n<- length(xin)
  xin<-sort(xin)
  arg1<-(sapply(xout, "-", xin))/h	#xin.use
  arg2<-kfun(arg1)
  arg3<-colSums(arg2)
  arg3/n
}

"iikde"<- function(xin, xout, kfun, h)
{
  n<- length(xin)
  arg1<-(sapply(xout, "-", xin))/h	
  arg2<-kfun(arg1)
  arg3<-colSums(arg2)
  arg3/n
}


"Ikde2"<- function(xin, xout,   ikfun)
{
  n<- length(xin)
  Bands<-BandsIkde(xin)
  PilotBand<-Bands[1]/1000
  AdaptiveBand<-Bands[2]/500
  xin<-sort(xin)
  kernel<-iikde(xin, xin, ikfun, PilotBand) 
  kernelxout<- iikde(xin, xout, ikfun, PilotBand) 
  
  arg1<-(sapply(kernelxout, "-", kernel))/AdaptiveBand	#xin.use
  arg2<-ikfun(arg1)
  arg3<-colSums(arg2)
  arg3/n
}


"TransIkde"<-
  #Adaptive variable bandwidth distribution function estimator
  function(xin, xout, kfun=Epanechnikov)
  {
    n<- length(xin)
    xin<-sort(xin)
    nn<-length(xout)
    Bands<-BandsIkde(xin)
    PilotBand<-Bands[1]
    AdaptiveBand<-Bands[2]	
    kernel<-kde(xin, xin, PilotBand, kfun) 
    kernelxout<- kde(xin, xout, PilotBand, kfun) 
    vdfe<-sapply(1:nn, function(i, kernel, kernelxout, AdaptiveBand, kfun)
    { 							 
      sum(   kfun(  (kernelxout[i]-kernel)/AdaptiveBand	))				
    }, kernel, kernelxout, AdaptiveBand, kfun)	
    vdfe/n
  }



"ikde"<- function(xin, xout, kfun, bandfactor)
{
  n<- length(xin)
  h<-bandfactor*diff(quantile(xin, c(.25, .75)))
  arg1<-(sapply(xout, "-", xin))/h	
  arg2<-kfun(arg1)
  arg3<-colSums(arg2)
  arg3/n
}



"Thre2.1"<-
  #Transformed hazard rate estimator (2nd stage) using nonparametric transformation -log(1-\hat F(x))
  function(xin, xout, h2, kfun, ikfun, bandfactor)
  {
    n<-length(xin)
    n1<-1:n
    nn<-length(xout)
    xin.use<-sort(xin)
    GofX<- -logb(1- Ikde2(xin.use, xout, ikfun), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xout, igaussian) #pchisq(xout, 12), base=exp(1)) #
    Tsample<- -logb(1- Ikde2(xin.use, xin.use,  ikfun), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xin.use, igaussian)	#pchisq(xin.use, 12), base=exp(1))#
  
    GofXdashed<- Thre1(xin.use, xout,  kfun, bandfactor)  #ChiHaz(xout, 10)#						
    arg1<-(sapply(GofX, "-", Tsample))/h2
    arg2<-kfun(arg1)/(n-n1+1)
    arg3<-colSums(arg2)
    TransEstimate<- arg3*GofXdashed/h2 					
   
    TransEstimate
    
  }


"VarHre"<-
  #Adaptive variable bandwidth hazard rate estimator
  function(xin, xout, h2, bandfactor, kfun=Epanechnikov)
  {
    n<- length(xin)
    nn<-length(xout)
    n1<-1:n
    xin.use<-sort(xin)
    kernel<-Thre1(xin.use, xin.use, kfun, bandfactor) #Pilot estimate (W-L)
    SqrtKernel<-sqrt(kernel)
    vhre<-sapply(1:nn, function(i, xin.use, xout, h2, kfun, n, n1, SqrtKernel)
    { 
      sum( (SqrtKernel *  kfun( ((xout[i]-xin.use)/h2) * SqrtKernel)	)/(n-n1+1))							
    }, xin.use, xout, h2, kfun, n, n1, SqrtKernel)	
    vhre/h2
  }

mydata<-unlist(read.csv("DFL.csv"))

#mydata<-mydata[,2]
#mydata<-sample(mydata, 10000)
xout<-seq(min(mydata), max(mydata), length=200)
xout<-quantile(xout, c(0.05, 0.95))
xout<-seq(min(xout), max(xout), length=200)
#x<-seq(0, 5,length=100)
#true.hazard<-hfrechet(x, 1,1,0)
#Mat1<-matrix(nrow=100, ncol=20)
#Mat2<-matrix(nrow=100, ncol=20)
#for(i in 1:20)
#{
#x1<- rfrechet(200, 1, 1,0)
#arg1<-Thre2(x1, x, Epanechnikov, IntEpanechnikov, .2)
#Mat1[,i]<-arg1
huse<-PilotWidth1(mydata)
arg2<-Thre2.1(mydata,xout,huse, Epanechnikov, IntEpanechnikov, 0.2)# VarHre(mydata,xout,huse,0.2,Epanechnikov)# LambdaHat(mydata, xout, huse, Epanechnikov)  # Thre2.1(mydata, xout, Epanechnikov, IntEpanechnikov, .2)
#Mat2[,i]<-arg2
#}
#arg1use<-rowMeans(arg1)
#arg2use<-rowMeans(arg2)

plot(xout, arg2, type="l", xlab="x", ylab="hazard rate")
#lines(x, arg1use, col=2)
#lines(x, arg2use, lty=3, col=3)
#plot(x, pfrechet(x,1,1,0), type="l")
#lines(x, Ikde2(x1, x, IntEpanechnikov), col=2)
#Ikde2(x1, x, IntEpanechnikov)
#lines(x, iikde(x1, x, IntEpanechnikov, BandsIkde(x1)[1]/1020))